Data Visualization

Climate

Precipitation

Code
## read data and aggregate
df <- data.frame(matrix(ncol = 6, nrow = 0))
colnames(df) <- c("station","name","date","prcp","tmax","tmin")
for (i in 1:3){
  file_name <- paste0("climate",i,".csv")
  df_curr <- read.csv(paste0("./data/",file_name), header = T)
  colnames(df_curr) <- c("station","name","date","prcp","tmax","tmin")
  df <- rbind(df_curr,df)
}

## group by date: each day, we have one average prcp, tmax, and tmin based on all stations in Toledo

climate <- df %>%
  group_by(date) %>%
  summarise_at(vars(prcp,tmax,tmin),  mean, na.rm = TRUE)


climate$date<-as.Date(climate$date,"%Y-%m-%d")
## sanity check
# any(duplicated(climate$date))
# min(climate$date);max(climate$date)

### climate for turbidity model, 2021-11-24 to 2022-12-31

# select data after 2021-11-24 and before 2022-12-31
climate_trub <- climate[(climate$date >= "2021-11-24") & (climate$date <= "2022-12-31"), ]

## interpolate the missing data using moving average
climate_trub$prcp <- na_ma(climate_trub$prcp, k = 4, weighting = "exponential")
climate_trub$tmax <- na_ma(climate_trub$tmax, k = 4, weighting = "exponential")
climate_trub$tmin <- na_ma(climate_trub$tmin, k = 4, weighting = "exponential")

## sanity check
#str(climate_trub)
#sum(is.na(climate_trub))
#min(climate_trub$date);max(climate_trub$date)

## prcp 
prcp_trub_df <- data.frame(climate_trub$date,climate_trub$prcp)
colnames(prcp_trub_df) <- c("Date","Precipitation")

## prepare data for candlestick plot
prcp_summary <- prcp_trub_df %>%                               
  group_by(Date) %>% 
  summarize(min = min(Precipitation),
            q1 = quantile(Precipitation, 0.25),
            median = median(Precipitation),
            mean = mean(Precipitation),
            q3 = quantile(Precipitation, 0.75),
            max = max(Precipitation))

p2 <- plot_ly(prcp_summary,
              x = ~Date,
              y = ~mean,
              type = 'scatter', mode = 'lines')
p2 <- p2 %>% layout(title = "Daily Precipitation Time Series In Study Area",
      xaxis = list(rangeslider = list(visible = T)),
       yaxis = list(title = 'Average precipitation (in)'))
p2

Figure 1: Average precipitation in study area

Figure 1 shows the time series plot of the average precipitation (in liquid form) in the study area. The summer and fall are the season with high precipitation. However, this is not necessary indicate that winter and spring have less total precipitation, as the snow in the winter may also contribute to the total precipitation. There might be periodicity in the precipitation pattern, which will need more analysis to confirm.

Temperature

Figure 2 shows the daily maximum and minimum temperature in study area. The temperature has a generally increasing trend. Both maximum and minimum temperature have similar trends.

Code
## t
t_trub_df <- data.frame(climate_trub$date,climate_trub$tmax, climate_trub$tmin)

colnames(t_trub_df) <- c("Date","Tmax","Tmin")



p3 <- plot_ly(t_trub_df, type = 'scatter', mode = 'lines')%>%
  add_trace(x = ~Date, y = ~Tmax, name = "Max")%>%
  add_trace(x = ~Date, y = ~Tmin, name = "Min")%>%
  layout(title='Max and Min Temperature for Study area',
         xaxis = list(rangeslider = list(visible = T),
                      rangeselector=list(
                        buttons=list(
                          list(count=1, label="1m", step="month", stepmode="backward"),
                          list(count=6, label="6m", step="month", stepmode="backward"),
                          list(count=1, label="YTD", step="year", stepmode="todate"),
                          list(count=1, label="1y", step="year", stepmode="backward")
                        ))))

p3 <- p3 %>%
  layout(
    xaxis = list(zerolinecolor = '#ffff',
                 zerolinewidth = 2,
                 gridcolor = 'ffff'),
    yaxis = list(zerolinecolor = '#ffff',
                 zerolinewidth = 2,
                 gridcolor = 'ffff',
                 title = 'Temperature, F'),
    plot_bgcolor='#e5ecf6', margin = 0.2, width = 900)
p3

Figure 2: Maximum and minimum temperature in study area

Water quality

Stream discharge

Code
## import and processing data
discharge <- read.csv("./data/discharge.csv")
discharge <- discharge[discharge$X.!= "#",]
discharge <- discharge[3:dim(discharge)[1],]
colnames(discharge) <- c("prefix", "station","date","discharge","flag")
discharge = discharge[c("prefix", "station","date","discharge","flag")]
discharge$date<-as.Date(discharge$date,"%m/%d/%y")
discharge$date <- as.Date(ifelse(discharge$date > Sys.Date(), 
  format(discharge$date, "19%y-%m-%d"), 
  format(discharge$date)))
# head(discharge)
# dim(discharge)
# str(discharge)

# select data after 1970
discharge = discharge[discharge$date >= "1970-01-01", ]
# head(discharge)
# dim(discharge)
# str(discharge)

## interpolate the missing data using moving average
library(imputeTS)
discharge$discharge <- na_ma(discharge$discharge, k = 4, weighting = "exponential")

## sanity check
# str(discharge)
# sum(is.na(discharge))
Code
p <- plot_ly(discharge, type = 'scatter', mode = 'lines')%>%
  add_trace(x = ~date, y = ~discharge, name="discharge")%>%
  layout(title='Stream discharge at Waterville, OH station',
         xaxis = list(rangeslider = list(visible = T),
                      rangeselector=list(
                        buttons=list(
                          list(count=1, label="1m", step="month", stepmode="backward"),
                          list(count=6, label="6m", step="month", stepmode="backward"),
                          list(count=1, label="1y", step="year", stepmode="backward"),
                          list(count=5, label="5y", step="year", stepmode="backward"),
                          list(count=16, label="YTD", step="year", stepmode="todate")
                          )
                        )))

p <- p %>%
  layout(
    xaxis = list(zerolinecolor = '#ffff',
                 zerolinewidth = 2,
                 gridcolor = 'ffff',
                 title = "Date"),
    yaxis = list(zerolinecolor = '#ffff',
                 zerolinewidth = 2,
                 gridcolor = 'ffff',
                 title = "Discharge, ft<sup>3</sup>/s"),
    plot_bgcolor='#e5ecf6', margin = 0.2, width = 900)
p

Figure 3: Stream discharge at Waterville, OH station

Stream flow has clear periodicity according to Figure 3. There are some high value year, for example 2015. This is caused by the 2014–2016 El Niño event.

Turbidity

Code
## read data
turb <- read.csv("./data/turbidity.csv", header = F, comment.char = "#")
turb <- turb[3:nrow(turb),1:9]
colnames(turb) <- c("prefix", "station","date","max","flag1","min","flag2","mean","flag3")
turb$date<-as.Date(turb$date,"%m/%d/%y")

## extract daily max
turb_max<- turb[c("date","max")]
turb_max$date<-as.Date(turb_max$date,"%m/%d/%y")
turb_max$max = as.numeric(turb_max$max) 
## interpolate the missing data using moving average
turb_max$max <- na_ma(turb_max$max, k = 4, weighting = "exponential")

# min(turb_max$date);max(turb_max$date)

## extract to 12/31/2022 as the climate max date
turb_max = turb_max[turb_max$date <= "2022-12-31", ]
# head(turb_max)
# min(turb_max$date);max(turb_max$date)
## sanity check
# str(turb_max)
# sum(is.na(turb_max))

turb_df <- data.frame(turb_max$date,turb_max$max)
colnames(turb_df) <- c("Date","max")



## prepare data for candlestick plot
prcp_summary <- turb_df %>%                               
  group_by(Date) %>% 
  summarize(min = min(max),
            q1 = quantile(max, 0.25),
            median = median(max),
            mean = mean(max),
            q3 = quantile(max, 0.75),
            max = max(max))

p2 <- plot_ly(prcp_summary,
              x = ~Date,
              y = ~mean,
              type = 'scatter', mode = 'lines')
p2 <- p2 %>% layout(title = "Maximum turbidity in studied stream",
      xaxis = list(rangeslider = list(visible = T)),
       yaxis = list(title = 'Turbidity, NTU'))
p2

Figure 4: Maximum turbidity at studied stream

Xylem stock

Figure 5 shows the plot of Xylem stock prices from 2011 to 2023. We can see the price has increased more than triple since 2014.

Code
## read data since 2011
xyl <- getSymbols("XYL",auto.assign = FALSE, from = "2011-10-14",src="yahoo")
xyl$date<-as.Date(xyl$date,"%Y-%m-%d")

chartSeries(xyl, theme = chartTheme("white"), # Theme
            bar.type = "hlc",  # High low close 
            up.col = "green",  # Up candle color
            dn.col = "red")   # Down candle color)


xyl.close<- Ad(xyl)

Figure 5: Xylem Inc. stock price 2011-2023